******************************************************************************************
* Do-file name:	cr_region_data.do                                                        
* Task:         restrict sample, collapse data at municipality level, create outcome vars
* Last change:  07.02.2025                                                               
******************************************************************************************


******************************************************************************************
*** program setup
******************************************************************************************

version 14.2
clear all
macro drop _all
set linesize 90
set more off
* set trace on
discard
set seed 123456789


******************************************************************************************
*** load working sample and collapse at the municipality level
******************************************************************************************

*** load treatment and control sample
use "data/work.dta", clear 

** restrict sample
keep if year >= 1985
keep if year <= 1995

** drop value labels to save space
label drop imp_edu edu_3 tasktype

** drop variables to save space
drop bibbtaskthreedigit bibbtasktwodigit estsize leh_lart_id imp_lag_ao_gem ausbild sample_indiv ausbild2 ip1
drop dauer tag_entg tenure double_spell wz73_imp tag_entg_cens left_cens real_tag_entg real_tag_entg_cens wz73_class


******************************************************************************************
*** restrict data
******************************************************************************************

drop if status == 1 & ao_kreis_imp == .	
drop if status == 1 & ao_gem_imp   == .	
drop if status == 1 & weight_fte   == .	

*** medical doctor
* drop workers with at least one spell as medical doctor. Spells are often strange,
* in particular in 1997 and 1998 when a lot of doctors suddenly disappear.
gen arzt = (841 <= beruford_imp & beruford_imp <= 844)
sort vsnr_ano arzt
bys vsnr_ano (arzt): replace arzt = arzt[_N]
tab arzt, m
drop if arzt == 1
drop arzt

*** Apprentices
sort vsnr year
gen laststib=0
replace laststib = 1 if berufstg == 0 & vsnr != vsnr[_n+1]
sort vsnr laststib
qui by vsnr: replace laststib = laststib[_N]
tab laststib, miss
drop laststib

** indicator for those who at least once worked as an apprentice or Praktikant
sort vsnr year
egen minstib = min(berufstg), by(vsnr)
gen someapp = 0
replace someapp = 1 if minstib == 0
drop minstib
label variable someapp "at least one spell as apprentice" 

** indicator for the end of the apprenticeship
sort vsnr year
gen ausindex = 0
replace ausindex = 1 if /* 
  */ vsnr==vsnr[_n-1]&berufstg>0&berufstg[_n-1]==0&year<=year[_n-1]+1
replace ausindex = 1 if  /*
  */ vsnr==vsnr[_n+1]&berufstg==0&berufstg[_n+1]>0&year[_n+1]>year+1
label variable ausindex "spell worker finished apprenticeship"
tab ausindex, miss

** year apprenticeship ended 
sort vsnr year
gen ausyend=0
replace ausyend=year if ausindex==1
sort vsnr ausyend
qui by vsnr: replace ausyend=ausyend[_N]
label variable ausyend "year apprenticeship ended"
/*If more than 1 apprenticeship: year last apprenticeship ended*/
tab ausyend, miss

** indicator for wage spell: all spells for which year>year in which appr. ended, last apprenticeship for those with more than 1 apprenticeship
sort vsnr year
gen aus=0
replace aus=1 if year>ausyend&someapp==1&ausyend!=0
replace aus=1 if someapp==0
label variable aus "ind. for wage spell: all years>year appr. finished"

* drop unnecessary variables
drop ausyend ausindex someapp

*** drop if citizenship information missing 
drop if nation == .

*** broad age range: keep individuals between 16-65
keep if age >= 16 & age <= 65

*** if employed, keep only normally employed persons (Sozialversicherungpflichtig Beschäftigte) and apprentices (Auszubildende)
keep if (status == 1 & (pers_gr == 101 | pers_gr == 102)) | status == 2 | status == 3

*** drop military (Large NATO maneuver in 1988 (and small one in 1990) causes large changes in employment in districts 9374 and 9363)
drop if (status == 1 & (wz73 == 920 | wz73 == 921))    // drops observations until 2002. For later years > 2003 use w03.

**** drop variables not needed anymore
drop staat staat2 nation_gr2 wz73 bnr_ano_n ao_gem beruford border_imp_ddr


******************************************************************************************
*** create variables to be used for collapseing the data
******************************************************************************************

*** create native dummy
gen 	native = .
replace	native = 1  if nation_gr == 1
replace native = 0  if nation_gr != 1 & nation_gr != .

*** create czech dummy
gen 	czech = .
replace czech = 1  if nation_gr == 2
replace czech = 0  if nation_gr != 2 & nation_gr != .

*** create dummies for employment groups (Fitzenberger imputed education variable used for education groups) 
** total employment
gen emp_tot 	 = 1  if status == 1

** native employment
gen 	emp_nat = 1   if nation_gr == 1 & status == 1
replace emp_nat = 0   if nation_gr != 1 & nation_gr != . & status == 1

** czech employment
gen 	emp_cze = 1   if nation_gr == 2 & status == 1

** native + czech employment
gen 	emp_nc = 1   if emp_nat == 1 | emp_cze == 1

** native employment by 2 education groups
gen emp_nat_edu21 = 1  if nation_gr == 1 & imp_edu_2 == 1 & status == 1
gen emp_nat_edu22 = 1  if nation_gr == 1 & imp_edu_2 == 2 & status == 1

** native employment, conditional on age is non-missing
gen emp_nat_age3 = 1   if nation_gr == 1 & (age >= 16 & age <= 65) & status == 1

** native employment by 3 age groups
gen emp_nat_age31 = 1  if nation_gr == 1 & (age >= 16 & age <= 29) & status == 1
gen emp_nat_age32 = 1  if nation_gr == 1 & (age >= 30 & age <= 49) & status == 1
gen emp_nat_age33 = 1  if nation_gr == 1 & (age >= 50 & age <= 65) & status == 1


*** all apprentice
** native employment apprenticeship
gen emp_nat_app = 1  if nation_gr == 1 & status == 1 & berufstg == 0 & pers_gr == 102

** czech employment apprenticeship
gen emp_cze_app = 1  if nation_gr == 2 & status == 1 & berufstg == 0 & pers_gr == 102

*** only new apprentice
** native 
// indicate first apprenticeship year
bys vsnr_ano: gen  app_year       = year           if emp_nat_app == 1
bys vsnr_ano: egen app_year_first = min(app_year)  if emp_nat_app == 1

gen emp_nat_app_first = 1  if emp_nat_app == 1 & year == app_year_first
drop app_year  app_year_first

** czech 
// indicate first apprenticeship year
bys vsnr_ano: gen  app_year       = year           if emp_cze_app == 1
bys vsnr_ano: egen app_year_first = min(app_year)  if emp_cze_app == 1

gen emp_cze_app_first = 1  if emp_cze_app == 1 & year == app_year_first
drop app_year  app_year_first pers_gr


*** create wage outcomes (Fitzenberger imputed wages and education variables used)
** total wage
gen y_tot 	   = impy

** native wage
gen y_nat = impy  if nation_gr == 1

** native wage by 2 education groups
gen y_nat_edu21 = impy  if nation_gr == 1 & imp_edu_2 == 1
gen y_nat_edu22 = impy  if nation_gr == 1 & imp_edu_2 == 2

** native wage by 3 age groups
gen y_nat_age31 = impy  if nation_gr == 1 & (age >= 16 & age <= 29)
gen y_nat_age32 = impy  if nation_gr == 1 & (age >= 30 & age <= 49)
gen y_nat_age33 = impy  if nation_gr == 1 & (age >= 50 & age <= 65)


*** create dummies for occupational upgrading: employment groups  

** 1) measured using occupational tasks (2-digit occupations)
** native employment, conditional on occupational tasks groups is non-missing
gen emp_nat_task2d = 1    if nation_gr == 1 & task_2dig_2 != . & status == 1

** native employment by 2 occupational tasks groups
gen emp_nat_task2d21 = 1  if nation_gr == 1 & task_2dig_2 == 1 & status == 1
gen emp_nat_task2d22 = 1  if nation_gr == 1 & task_2dig_2 == 2 & status == 1

** total native employment by 2 occupational tasks and 3 age groups
* manual/routine (task_2dig_2=1)
gen emp_nat_task2d21_age31 = 1  if nation_gr == 1 & task_2dig_2 == 1 & (age >= 16 & age <= 29) & status == 1
gen emp_nat_task2d21_age32 = 1  if nation_gr == 1 & task_2dig_2 == 1 & (age >= 30 & age <= 49) & status == 1
gen emp_nat_task2d21_age33 = 1  if nation_gr == 1 & task_2dig_2 == 1 & (age >= 50 & age <= 65) & status == 1

* abstract (task_2dig_2=2)
gen emp_nat_task2d22_age31 = 1  if nation_gr == 1 & task_2dig_2 == 2 & (age >= 16 & age <= 29) & status == 1
gen emp_nat_task2d22_age32 = 1  if nation_gr == 1 & task_2dig_2 == 2 & (age >= 30 & age <= 49) & status == 1
gen emp_nat_task2d22_age33 = 1  if nation_gr == 1 & task_2dig_2 == 2 & (age >= 50 & age <= 65) & status == 1


** 1) measured using occupational tasks (3-digit occupations)
** total employment, conditional on occupational tasks groups is non-missing
gen emp_tot_task3d = 1    if emp_tot == 1   & task_3dig_2 != . & status == 1

** native employment, conditional on occupational tasks groups is non-missing
gen emp_nat_task3d = 1    if nation_gr == 1 & task_3dig_2 != . & status == 1

** native + czech employment, conditional on occupational tasks groups is non-missing
gen emp_nc_task3d = 1     if emp_nc == 1   & task_3dig_2 != . & status == 1


** total employment by 2 occupational tasks groups
gen emp_tot_task3d21 = 1  if emp_tot == 1 & task_3dig_2 == 1 & status == 1
gen emp_tot_task3d22 = 1  if emp_tot == 1 & task_3dig_2 == 2 & status == 1

** native employment by 2 occupational tasks groups
gen emp_nat_task3d21 = 1  if nation_gr == 1 & task_3dig_2 == 1 & status == 1
gen emp_nat_task3d22 = 1  if nation_gr == 1 & task_3dig_2 == 2 & status == 1

** native + czech employment by 2 occupational tasks groups
gen emp_nc_task3d21 = 1  if emp_nc == 1 & task_3dig_2 == 1 & status == 1
gen emp_nc_task3d22 = 1  if emp_nc == 1 & task_3dig_2 == 2 & status == 1

** total native employment by 2 occupational tasks and 3 age groups
* manual/routine (task_3dig_2=1)
gen emp_nat_task3d21_age31 = 1  if nation_gr == 1 & task_3dig_2 == 1 & (age >= 16 & age <= 29) & status == 1
gen emp_nat_task3d21_age32 = 1  if nation_gr == 1 & task_3dig_2 == 1 & (age >= 30 & age <= 49) & status == 1
gen emp_nat_task3d21_age33 = 1  if nation_gr == 1 & task_3dig_2 == 1 & (age >= 50 & age <= 65) & status == 1

* abstract (task_3dig_2=2)
gen emp_nat_task3d22_age31 = 1  if nation_gr == 1 & task_3dig_2 == 2 & (age >= 16 & age <= 29) & status == 1
gen emp_nat_task3d22_age32 = 1  if nation_gr == 1 & task_3dig_2 == 2 & (age >= 30 & age <= 49) & status == 1
gen emp_nat_task3d22_age33 = 1  if nation_gr == 1 & task_3dig_2 == 2 & (age >= 50 & age <= 65) & status == 1


*** create dummies for occupational upgrading: wage groups  

** 1) measured using occupational tasks (2-digit occupations)
** native wage by 2 occupational tasks groups
gen y_nat_task2d21 = impy  if nation_gr == 1 & task_2dig_2 == 1
gen y_nat_task2d22 = impy  if nation_gr == 1 & task_2dig_2 == 2


** 1) measured using occupational tasks (3-digit occupations)

** native wage, conditional on occupational tasks groups is non-missing
gen y_nat_task3d = impy  if nation_gr == 1 & task_3dig_2 != .

** native wage by 2 occupational tasks groups
gen y_nat_task3d21 = impy   if nation_gr == 1 & task_3dig_2 == 1
gen y_nat_task3d22 = impy   if nation_gr == 1 & task_3dig_2 == 2


*** create education dummies
tab imp_edu_2, gen (imp_edu_2_)
rename imp_edu_2_1 imp_edu_2_low
rename imp_edu_2_2 imp_edu_2_high

tab imp_edu, gen (imp_edu_3_)
rename imp_edu_3_1 imp_edu_3_low
rename imp_edu_3_2 imp_edu_3_med
rename imp_edu_3_3 imp_edu_3_high 

*** create task dummies
tab task_2dig_2, gen (task_2dig_2_)
rename task_2dig_2_1 task_2dig_2_man
rename task_2dig_2_2 task_2dig_2_abst

tab task_3dig_2, gen (task_3dig_2_)
rename task_3dig_2_1 task_3dig_2_man
rename task_3dig_2_2 task_3dig_2_abst

*** create age categories: 3 groups
gen age_3 = .
replace age_3 = 1  if age >= 16 & age <= 29
replace age_3 = 2  if age >= 30 & age <= 49
replace age_3 = 3  if age >= 50 & age <= 65
label var age_3 "age, 3 groups: 1 = 16-29, 2 = 30-49, 3 = 50-65"

tab age_3, gen (age_3_)


*** flag the 13 districts eligible under the commuting policy and used in the treatment group in the qje paper
gen border_imp_13 = (ao_kreis_imp == 9262 | ao_kreis_imp == 9263 | ao_kreis_imp == 9271 | ao_kreis_imp == 9272 | ao_kreis_imp == 9275 | ///
			         ao_kreis_imp == 9276 | ao_kreis_imp == 9278 | ao_kreis_imp == 9361 | ao_kreis_imp == 9363 | ao_kreis_imp == 9371 | ///
			         ao_kreis_imp == 9372 | ao_kreis_imp == 9374 | ao_kreis_imp == 9376)
replace border_imp_13 = .  if ao_kreis_imp == . 
label variable border_imp_13 "1 if treat. district & used in qje treat. group, 0 otherwise"


*** recreate balanced sample
fillin vsnr_ano year
replace status = 3  if _fillin == 1
drop _fillin
xtset vsnr_ano year
save "data/work_region.dta", replace


*** create outcome variables for the decomposition
keep vsnr_ano year ao_gem_imp weight_fte emp_nat emp_nat_task2d task_2dig_2 emp_nat_age3 age_3 y_nat   emp_nat_task3d task_3dig_2 y_nat_task3d

rename ao_gem_imp 		ao_gem_imp_
rename weight_fte 		weight_fte_
rename emp_nat 			emp_nat_ 
rename emp_nat_task2d  	emp_nat_task2d_
rename task_2dig_2 		task_2dig_2_
rename emp_nat_age3  	emp_nat_age3_
rename age_3 			age_3_
rename y_nat			y_nat_
rename emp_nat_task3d   emp_nat_task3d_
rename task_3dig_2		task_3dig_2_
rename y_nat_task3d		y_nat_task3d_

reshape wide ao_gem_imp_ weight_fte_ emp_nat_ emp_nat_task2d_ task_2dig_2_ emp_nat_age3_ age_3_ y_nat_  emp_nat_task3d_  task_3dig_2_  y_nat_task3d_, i(vsnr_ano) j(year)
save "data/ao_gem_wide.dta", replace

use "data/work_region.dta", clear
merge m:1 vsnr_ano using "data/ao_gem_wide.dta"
drop if _merge == 2
drop _merge
compress


*** globals
global start_year 	 = 1985
global end_year 	 = 1995


********* CODE FOR EMPLOYMENT DECOMPOSITION 

/*  differences relative to 1990 */

* 1) 
// * this variable will be recentered to year when outflows occured after collapsing the data
forvalues i = $start_year/$end_year {
	gen status_dec_emp_1_`i' = (emp_nat_1990 == 1 & emp_nat_`i' == . & ao_gem_imp_1990 != .) if year == 1990
	}

* 2)
// * this variable will be recentered to year when outflows occured after collapsing the data
forvalues i = $start_year/$end_year {
	gen status_dec_emp_2_`i' = (emp_nat_1990 == 1 & emp_nat_`i' == 1 & ao_gem_imp_1990 != ao_gem_imp_`i' & ao_gem_imp_1990 != . & ao_gem_imp_`i' != .) if year == 1990
	}

* 3)
gen status_dec_emp_3 = .
forvalues i = $start_year/$end_year {
	replace status_dec_emp_3 = ((emp_nat_1990 == 1 & emp_nat == 1 & ao_gem_imp_1990 != ao_gem_imp & ao_gem_imp_1990 != . & ao_gem_imp != .) | /// 
				                (emp_nat_1990 == . & emp_nat == 1                         								 & ao_gem_imp != .)) if year == `i'
	}

* 4)	
gen status_dec_emp_4 = .
forvalues i = $start_year/$end_year {
	replace status_dec_emp_4 = (emp_nat_1990 == 1 & emp_nat == 1 & ao_gem_imp_1990 != ao_gem_imp & ao_gem_imp_1990 != . & ao_gem_imp != .) if year == `i'
	}	

* 5)
gen status_dec_emp_5 = .
forvalues i = $start_year/$end_year {
	replace status_dec_emp_5 = (emp_nat_1990 == . & emp_nat == 1                         								 & ao_gem_imp != .) if year == `i'
	}	
	
* 6)
gen status_dec_emp_6 = .
forvalues i = $start_year/$end_year {
	replace status_dec_emp_6 = (emp_nat_1990 == . & emp_nat == 1                         		 & ao_gem_imp_1990 != .	& ao_gem_imp != .) if year == `i'
	}		

* 7)
gen status_dec_emp_7 = .
forvalues i = $start_year/$end_year {
	replace status_dec_emp_7 = (emp_nat_1990 == . & emp_nat == 1                         		 & ao_gem_imp_1990 == .	& ao_gem_imp != .) if year == `i'
	}	
	
* 8)
gen status_dec_emp_8 = .
forvalues i = $start_year/$end_year {
	replace status_dec_emp_8 = (emp_nat_1990 == . & emp_nat == 1 & ao_gem_imp_1990 != ao_gem_imp & ao_gem_imp_1990 != .	& ao_gem_imp != .) if year == `i'
	}

* 9)
gen status_dec_emp_9 = .
forvalues i = $start_year/$end_year {
	replace status_dec_emp_9 = (emp_nat_1990 == . & emp_nat == 1 & ao_gem_imp_1990 == ao_gem_imp & ao_gem_imp_1990 != .	& ao_gem_imp != .) if year == `i'
	}

* 10)
* change in FTE-weights of stayer (employed now and in 1990 and gemeinde = gemeinde in 1990)
gen status_fteweights = .
forvalues i = $start_year/$end_year {
	replace status_fteweights = (weight_fte - weight_fte_1990) if (emp_nat == 1 & emp_nat_1990 == 1 & ao_gem_imp == ao_gem_imp_1990 & ao_gem_imp_1990 != . & ao_gem_imp != .) & year == `i'
	}	

* 12a) 
// calculate here only stayer (E_r0_rt). Second part of term 12) (E_r0_r't) will be calculate in 2) and added to this part after aggregation
gen status_dec_emp_12a = .
forvalues i = $start_year/$end_year {
	replace status_dec_emp_12a = (emp_nat_1990 == 1 & emp_nat == 1     & ao_gem_imp_1990 == ao_gem_imp    & ao_gem_imp_1990 != . & ao_gem_imp != .) if year == `i'
	}


**** code for non-employed agg. estimation

* 11a) 
gen status_dec_emp_11a = .
forvalues i = $start_year/$end_year {
	replace status_dec_emp_11a = (emp_nat_1990 == . & emp_nat == 1 & ao_gem_imp_1990 == ao_gem_imp & ao_gem_imp_1990 != . & ao_gem_imp != .) if year == `i'
	}

* 11b)
// * this variable will be recentered to year when outflows occured after collapsing the data
forvalues i = $start_year/$end_year {
	gen status_dec_emp_11b_`i' = (emp_nat_1990 == . & emp_nat_`i' == 1 & ao_gem_imp_1990 != ao_gem_imp_`i' & ao_gem_imp_1990 != . & ao_gem_imp_`i' != .) if year == 1990
	}


gen non_emp_nat = 1  if (status == 2 | status == 3) & ao_gem_imp != . & nation_gr == 1


***** Employment decomposition by task and age groups
// run ado file "cr_dec_emp_groups_ado.do"

do "do-files\data_management/cr_dec_emp_groups_ado.do"

*** save dataset for collapseing and use in regional approach
compress
label data "restricted estimation sample (1985-1995)"
save "data/work_region.dta", replace	


******************************************************************************************
*** create smaller dataset for descriptive statistics
******************************************************************************************

*** restrict sample
keep if year >= 1985

*** drop unnecessary variables
drop ao_gem_imp_19?? weight_fte_19?? emp_nat_19?? emp_nat_task2d_19?? emp_nat_age3_19?? task_2dig_2_19?? age_3_19?? y_nat_19?? status_*

*** save dataset for descriptive statistics
sort vsnr_ano year
compress
label data "sample for descriptive statistics (1985-1995)"
notes drop _dta
notes: sample between 1985 & 1995
notes: vsnr is classified as employed if employed at 30/06 in respective year
save "data/desc_stats.dta", replace


******************************************************************************************
*** prepare data for analysis on individual level
******************************************************************************************

use "data/work_region.dta", clear	

*** flag persons in individual analysis sample 
gen help_1 = 1  if (border_imp == 1 | control_imp == 1) & year == 1990
replace help_1 = 0  if help_1 == .
bys vsnr_ano (year): egen sample_indiv2 = max(help_1)
label var sample_indiv2 "1 if individual belongs to individual sample"
drop help_1

*** restrict sample
keep  if sample_indiv2 == 1

*** drop variables
drop emp_nat_edu*  emp_nat_age*  emp_nat_task*  y_nat_edu*  y_nat_age??  y_nat_task*   
drop status_* aus

*** correct distance varaibale
bys ao_gem_imp: egen distmax=max(distance)	
drop distance
rename distmax distance

*** create outcome variables on employment and wages
sort vsnr_ano year
tsset vsnr_ano year

** native employment change 
foreach x of numlist 1985/1995 {
replace emp_nat_`x' = 0  if emp_nat_`x' == .
	}
foreach y of numlist 1985/1995 {
gen d_emp_nat_`y'_90 = emp_nat_`y' - emp_nat_1990  if year == 1990 & nation_gr == 1
	}


** create yearly wage (based on imputed wages + missings replacement of status 2&3 persons in 1990 by max 4 years lag)
// y_tot, y_nat, y_for are based on impy, i.e. without the non-employed in 1990, therefore not used here, but only in the regional approach

foreach x of numlist 1/5 {
local y = 1990 - `x'
gen impy2_`y' = L`x'.impy_2  if year == 1990
	}
foreach x of numlist 1/5 {
local y = 1990 + `x'
gen impy2_`y' = F`x'.impy_2  if year == 1990
	}
gen impy2_1990 = impy_2  if year == 1990


** native wage growth
foreach y of numlist 1985/1995 {
gen d_wage_nat_`y'_90 = impy2_`y' - impy2_1990  if year == 1990 & nation_gr == 1 
	}
replace d_wage_nat_1990_90 = 0  if year == 1990 & d_wage_nat_1990_90 != .


*** drop variables
* drope wide variables (not needed anymore)
drop weight_fte_19??  y_nat_19?? 


*** save dataset for individual analysis
sort vsnr_ano year
compress
label data "estimation sample for indiv. analysis (1985-1995)"
notes drop _dta
notes: sample between 1985 & 1995
notes: vsnr is classified as employed if employed at 30/06 in respective year
save "data/work_indiv.dta", replace


******************************************************************************************
*** collapse data on municipality level: employment sample
******************************************************************************************

** create datasets for sub-group decomposition
* set globals
* keep only analysis regions
global base_restric_ep "(border_imp == 1 | control_imp == 1)"

* 1) All workers
global restric_employ_1 "$base_restric_ep"
global weight_1	"weight_fte"

// non-employed workers
* 2) All non-employed workers
global restric_employ_2 "$base_restric_ep"
global weight_2	"weight_fte_2"


foreach z of numlist 1 2  {

use if ${restric_employ_`z'} using "data/work_region.dta", clear	

** drope wide variables (not needed anymore and would not collapse correctly)
drop ao_gem_imp_19?? weight_fte_19?? emp_nat_19?? emp_nat_task2d_19?? task_2dig_2_19?? emp_nat_age3_19?? age_3_19?? y_nat_19??
sort vsnr_ano year

** create weight for fte including non-employed individulas
gen 	weight_fte_2 = weight_fte
replace weight_fte_2 = 1  if status != 1

** municipality level: main variables (as we use FTE weights only employed individuals are considered (status 1), some employed individ. have missings for FTE weights)
// for sample 2) non-employed are included using weight_fte_2
collapse (mean)    ao_kreis_imp weight_matching border_imp border_imp_13 control_imp ost distance ktyp ///
				   female imp_edu_2_low imp_edu_2_high imp_edu_3_low imp_edu_3_med imp_edu_3_high ///
				   task_2dig_2_man task_2dig_2_abst task_3dig_2_man task_3dig_2_abst ///
				   age_3_1 age_3_2 age_3_3 impy /// 
		  (sum)    czech native emp_* non_emp_nat ///
				   status_dec_* ///
		  (rawsum) status_ftew*  [iw=${weight_`z'}], by (ao_gem_imp year)	

** set panel dimension
fillin ao_gem_imp year
xtset  ao_gem_imp year
sort   ao_gem_imp year


***** code for employment decomp
foreach f of numlist 1/2 {
gen status_dec_emp_`f' = .
forvalues i = $start_year/$end_year {
	bys ao_gem_imp: egen status_dec_emp_`f'_`i'_max = total(cond(year == 1990, status_dec_emp_`f'_`i', .))
	replace status_dec_emp_`f' = status_dec_emp_`f'_`i'_max if year == `i'
	}
drop status_dec_emp_`f'_*
	}

***** code for employment decomp by task and age groups
foreach x in etr  eta  eao {
gen status_dec_`x'_3 = .
forvalues i = $start_year/$end_year {
	bys ao_gem_imp: egen status_dec_`x'_3_`i'_max = total(cond(year == 1990, status_dec_`x'_3_`i', .))
	replace status_dec_`x'_3 = status_dec_`x'_3_`i'_max if year == `i'
	}
drop status_dec_`x'_3_*
	}

foreach x in etr eta eay eam eao {
gen status_dec_`x'_4 = .
forvalues i = $start_year/$end_year {
	bys ao_gem_imp: egen status_dec_`x'_4_`i'_max = total(cond(year == 1990, status_dec_`x'_4_`i', .))
	replace status_dec_`x'_4 = status_dec_`x'_4_`i'_max if year == `i'
	}
drop status_dec_`x'_4_*
	}

foreach x in etr {
gen status_dec_`x'_8 = .
forvalues i = $start_year/$end_year {
	bys ao_gem_imp: egen status_dec_`x'_8_`i'_max = total(cond(year == 1990, status_dec_`x'_8_`i', .))
	replace status_dec_`x'_8 = status_dec_`x'_8_`i'_max if year == `i'
	}
drop status_dec_`x'_8_*
	}	

foreach x in eta {
gen status_dec_`x'_10 = .
forvalues i = $start_year/$end_year {
	bys ao_gem_imp: egen status_dec_`x'_10_`i'_max = total(cond(year == 1990, status_dec_`x'_10_`i', .))
	replace status_dec_`x'_10 = status_dec_`x'_10_`i'_max if year == `i'
	}
drop status_dec_`x'_10_*
	}	

***** code for non-employed
gen status_dec_emp_11b = .
forvalues i = $start_year/$end_year {
	bys ao_gem_imp: egen status_dec_emp_11b_`i'_max = total(cond(year == 1990, status_dec_emp_11b_`i', .))
	replace status_dec_emp_11b = status_dec_emp_11b_`i'_max if year == `i'
	}
drop status_dec_emp_11b_*


keep  if border_imp == 1 | control_imp == 1


******************************************************************************************
*** correct variables
******************************************************************************************

*** correct distance varaibale
bys ao_gem_imp: egen distmax=max(distance)
drop distance
rename distmax distance

*** save dataset for estimating first stage
if `z' == 1 {
dis `z'
save "data/work_first_stage_temp.dta", replace
	}
else {
dis `z'
	}


******************************************************************************************
*** calculate outcome variables for employment growth
******************************************************************************************

sort  ao_gem_imp year

*** total employment growth in year X compared to 1990: 
gen emp_tot_90 = emp_tot  if year == 1990
bys ao_gem_imp: egen emp_tot_90_max = max(emp_tot_90)
drop emp_tot_90
rename emp_tot_90_max emp_tot_90

gen g_emp_tot = (emp_tot - emp_tot_90) / emp_tot_90
replace g_emp_tot = 0  if year == 1990

*** native employment growth in year X compared to 1990: 
gen emp_nat_90 = emp_nat  if year == 1990
bys ao_gem_imp: egen emp_nat_90_max = max(emp_nat_90)
drop emp_nat_90
rename emp_nat_90_max emp_nat_90

gen g_emp_nat = (emp_nat - emp_nat_90) / emp_nat_90
replace g_emp_nat = 0  if year == 1990

*** native + czech employment growth in year X compared to 1990: 
gen emp_nc_90 = emp_nc  if year == 1990
bys ao_gem_imp: egen emp_nc_90_max = max(emp_nc_90)
drop emp_nc_90
rename emp_nc_90_max emp_nc_90

gen g_emp_nc = (emp_nc - emp_nc_90) / emp_nc_90
replace g_emp_nc = 0  if year == 1990


*** employment growth (native, total, foreign_2) by education group in year X compared to 1990: 
foreach group in nat   {
foreach x in 21 22 {
gen emp_`group'_edu`x'_90 = emp_`group'_edu`x'  if year == 1990
bys ao_gem_imp: egen emp_`group'_edu`x'_90_max = max(emp_`group'_edu`x'_90)
drop emp_`group'_edu`x'_90
rename emp_`group'_edu`x'_90_max emp_`group'_edu`x'_90

gen g_emp_`group'_edu`x' = (emp_`group'_edu`x' - emp_`group'_edu`x'_90) / emp_`group'_edu`x'_90
replace g_emp_`group'_edu`x' = 0  if year == 1990
	}
	}

*** employment growth (native) in year X compared to 1990 (conditional on occup. task not missing): 
foreach d of numlist 2 3 {				// use 2- and 3-digit occupational tasks
foreach group in nat   {
gen emp_`group'_task`d'd_90 = emp_`group'_task`d'd  if year == 1990
bys ao_gem_imp: egen emp_`group'_task`d'd_90_max = max(emp_`group'_task`d'd_90)
drop emp_`group'_task`d'd_90
rename emp_`group'_task`d'd_90_max emp_`group'_task`d'd_90

gen g_emp_`group'_task`d'd = (emp_`group'_task`d'd - emp_`group'_task`d'd_90) / emp_`group'_task`d'd_90
replace g_emp_`group'_task`d'd = 0  if year == 1990
	}
	}

** native employment growth by occupational task group in year X compared to 1990: 
foreach d of numlist 2 3 {				// use 2- and 3-digit occupational tasks
foreach x in 21 22 {
gen emp_nat_task`d'd`x'_90 = emp_nat_task`d'd`x'  if year == 1990
bys ao_gem_imp: egen emp_nat_task`d'd`x'_90_max = max(emp_nat_task`d'd`x'_90)
drop emp_nat_task`d'd`x'_90
rename emp_nat_task`d'd`x'_90_max emp_nat_task`d'd`x'_90

gen g_emp_nat_task`d'd`x' = (emp_nat_task`d'd`x' - emp_nat_task`d'd`x'_90) / emp_nat_task`d'd`x'_90
replace g_emp_nat_task`d'd`x' = 0  if year == 1990
	}
	}

*** employment growth (total, native+czech) in year X compared to 1990 (conditional on occup. task not missing): 
foreach d of numlist  3 {				// only 3-digit occupational tasks
foreach group in tot nc {
gen emp_`group'_task`d'd_90 = emp_`group'_task`d'd  if year == 1990
bys ao_gem_imp: egen emp_`group'_task`d'd_90_max = max(emp_`group'_task`d'd_90)
drop emp_`group'_task`d'd_90
rename emp_`group'_task`d'd_90_max emp_`group'_task`d'd_90

gen g_emp_`group'_task`d'd = (emp_`group'_task`d'd - emp_`group'_task`d'd_90) / emp_`group'_task`d'd_90
replace g_emp_`group'_task`d'd = 0  if year == 1990
	}
	}

** employment growth (total, native+czech) by occupational task group in year X compared to 1990: 
foreach group in tot nc  {
foreach d of numlist 3 {				// only 3-digit occupational tasks
foreach x in 21 22 {
gen emp_`group'_task`d'd`x'_90 = emp_`group'_task`d'd`x'  if year == 1990
bys ao_gem_imp: egen emp_`group'_task`d'd`x'_90_max = max(emp_`group'_task`d'd`x'_90)
drop emp_`group'_task`d'd`x'_90
rename emp_`group'_task`d'd`x'_90_max emp_`group'_task`d'd`x'_90

gen g_emp_`group'_task`d'd`x' = (emp_`group'_task`d'd`x' - emp_`group'_task`d'd`x'_90) / emp_`group'_task`d'd`x'_90
replace g_emp_`group'_task`d'd`x' = 0  if year == 1990
	}
	}
	}

*** employment growth (native, total, foreign_2) in year X compared to 1990 (conditional on age not missing): 
foreach group in nat   {
gen emp_`group'_age3_90 = emp_`group'_age3  if year == 1990
bys ao_gem_imp: egen emp_`group'_age3_90_max = max(emp_`group'_age3_90)
drop emp_`group'_age3_90
rename emp_`group'_age3_90_max emp_`group'_age3_90

gen g_emp_`group'_age3 = (emp_`group'_age3 - emp_`group'_age3_90) / emp_`group'_age3_90
replace g_emp_`group'_age3 = 0  if year == 1990
	}

*** employment growth (native, total, foreign_2) by age group in year X compared to 1990: 
foreach group in nat   {
foreach x in 31 32 33 {
gen emp_`group'_age`x'_90 = emp_`group'_age`x'  if year == 1990
bys ao_gem_imp: egen emp_`group'_age`x'_90_max = max(emp_`group'_age`x'_90)
drop emp_`group'_age`x'_90
rename emp_`group'_age`x'_90_max emp_`group'_age`x'_90

gen g_emp_`group'_age`x' = (emp_`group'_age`x' - emp_`group'_age`x'_90) / emp_`group'_age`x'_90
replace g_emp_`group'_age`x' = 0  if year == 1990
	}
	}

** native employment growth by occupational task and age group in year X compared to 1990: 
foreach d of numlist 2 3 {				// use 2- and 3-digit occupational tasks
foreach x in 21 22 {
foreach y in 31 32 33 {
gen emp_nat_task`d'd`x'_age`y'_90 = emp_nat_task`d'd`x'_age`y'  if year == 1990
bys ao_gem_imp: egen emp_nat_task`d'd`x'_age`y'_90_max = max(emp_nat_task`d'd`x'_age`y'_90)
drop emp_nat_task`d'd`x'_age`y'_90
rename emp_nat_task`d'd`x'_age`y'_90_max emp_nat_task`d'd`x'_age`y'_90

gen g_emp_nat_task`d'd`x'_age`y' = (emp_nat_task`d'd`x'_age`y' - emp_nat_task`d'd`x'_age`y'_90) / emp_nat_task`d'd`x'_age`y'_90
replace g_emp_nat_task`d'd`x'_age`y' = 0  if year == 1990
	}
	}
	}

*** native non-employed in 1990: 
gen non_emp_nat_90 = non_emp_nat  if year == 1990
bys ao_gem_imp: egen non_emp_nat_90_max = max(non_emp_nat_90)
drop non_emp_nat_90
rename non_emp_nat_90_max non_emp_nat_90


*** apprenticeship employment growth (native, czech) in year X compared to 1990: 
** all apprenticeships
foreach group in nat cze  {
gen emp_`group'_app_90 = emp_`group'_app  if year == 1990
bys ao_gem_imp: egen emp_`group'_app_90_max = max(emp_`group'_app_90)
drop emp_`group'_app_90
rename emp_`group'_app_90_max  emp_`group'_app_90

gen     g_emp_`group'_app = (emp_`group'_app - emp_`group'_app_90) / emp_`group'_app_90
replace g_emp_`group'_app = 0  if year == 1990
	}

** only new apprenticeships
foreach group in nat cze  {
gen emp_`group'_app_first_90 = emp_`group'_app_first  if year == 1990
bys ao_gem_imp: egen emp_`group'_app_first_90_max = max(emp_`group'_app_first_90)
drop emp_`group'_app_first_90
rename emp_`group'_app_first_90_max  emp_`group'_app_first_90

gen     g_emp_`group'_app_first = (emp_`group'_app_first - emp_`group'_app_first_90) / emp_`group'_app_first_90
replace g_emp_`group'_app_first = 0  if year == 1990
	}


******************************************************************************************
*** calculate outcome variables for decomposition
******************************************************************************************

***** 1) employment decomposition

** create dec_emp_12 by adding dec_emp_12a + dec_emp_2
gen status_dec_emp_12 = status_dec_emp_12a + status_dec_emp_2

gen dec_emp_1  = status_dec_emp_1  / emp_nat_90
gen dec_emp_2  = status_dec_emp_2  / emp_nat_90
gen dec_emp_3  = status_dec_emp_3  / emp_nat_90
gen dec_emp_4  = status_dec_emp_4  / emp_nat_90
gen dec_emp_5  = status_dec_emp_5  / emp_nat_90
gen dec_emp_6  = status_dec_emp_6  / emp_nat_90
gen dec_emp_7  = status_dec_emp_7  / emp_nat_90
gen dec_emp_8  = status_dec_emp_8  / emp_nat_90
gen dec_emp_9  = status_dec_emp_9  / emp_nat_90
gen dec_emp_fteweights = status_fteweights / emp_nat_90

gen dec_emp_12  = status_dec_emp_12  / emp_nat_90
gen dec_emp_12a = status_dec_emp_12a / emp_nat_90

*** for non-employed workers
** create dec_emp_11 by adding dec_emp_11a + dec_emp_11b
gen status_dec_emp_11 = status_dec_emp_11a + status_dec_emp_11b

gen dec_emp_11  = status_dec_emp_11  / non_emp_nat_90
gen dec_emp_11a = status_dec_emp_11a / non_emp_nat_90
gen dec_emp_11b = status_dec_emp_11b / non_emp_nat_90


*** employed workers by task groups

** 1) routine worker in 1990
* 1)   ((E^t_rR - E^0_rR) / E^0_rR) 
gen dec_etr_1  = (emp_nat_task3d21 - emp_nat_task3d21_90) / emp_nat_task3d21_90
label var dec_etr_1 "empl. growth, routine worker"

* 2)  E_rR_N / E^0_rR
gen dec_etr_2  = status_dec_etr_3 / emp_nat_task3d21_90
label var dec_etr_2 "displacement effect, routine worker"

* 3)  E_rR_r'R / E^0_rR
gen dec_etr_3  = status_dec_etr_4 / emp_nat_task3d21_90
label var dec_etr_3 "relocation effect, routine worker"

* 4)  (E_r'R_rR + E_N_rR) / E^0_rR
gen dec_etr_4  = (status_dec_etr_5 + status_dec_etr_6) / emp_nat_task3d21_90
label var dec_etr_4 "inflow effect, routine worker"

* 5)  (E_rR_rA + E_rR_r'A) / E^0_rR
gen dec_etr_5  = (status_dec_etr_7 + status_dec_etr_8) / emp_nat_task3d21_90
label var dec_etr_5 "upgrade effect, routine worker"

* 5a)  E_rR_rA / E^0_rR
gen dec_etr_5a  = status_dec_etr_7 / emp_nat_task3d21_90
label var dec_etr_5a "upgrade effect (r,r), routine worker"

* 5b)  E_rR_r'A / E^0_rR
gen dec_etr_5b  = status_dec_etr_8 / emp_nat_task3d21_90
label var dec_etr_5b "upgrade effect (r,r'), routine worker"

* 6)  (E_rA_rR + E_r'A_rR) / E^0_rR
gen dec_etr_6  = (status_dec_etr_9 + status_dec_etr_10) / emp_nat_task3d21_90
label var dec_etr_6 "downgrade effect, routine worker"

* 6a)  E_rA_rR / E^0_rR
gen dec_etr_6a  = (status_dec_etr_9) / emp_nat_task3d21_90
label var dec_etr_6a "downgrade effect (r,r), routine worker"

* 6b)  E_r'A_rR / E^0_rR
gen dec_etr_6b  = (status_dec_etr_10) / emp_nat_task3d21_90
label var dec_etr_6b "downgrade effect (r',r), routine worker"

* 7)  (E_rR_r'R + E_rR_r'A) / E^0_rR
gen dec_etr_7  = (status_dec_etr_4 + status_dec_etr_8) / emp_nat_task3d21_90
label var dec_etr_7 "relocation effect (L), routine worker"

* 8)  (E_r'R_rR + E_N_rR + E_r'A_rR) / E^0_rR
gen dec_etr_8  = (status_dec_etr_5 + status_dec_etr_6 + status_dec_etr_10) / emp_nat_task3d21_90
label var dec_etr_8 "inflow effect (L), routine worker"

* 9)  (E_rR_rR + E_rR_r'R) / E^0_rR
gen dec_etr_9  = (status_dec_etr_11 + status_dec_etr_4) / emp_nat_task3d21_90
label var dec_etr_9 "displacement effect (old), routine worker"


** 2) abstract worker in 1990
* 1)   ((E^t_rA - E^0_rA) / E^0_rA) 
gen dec_eta_1  = (emp_nat_task3d22 - emp_nat_task3d22_90) / emp_nat_task3d22_90
label var dec_eta_1 "empl. growth, abstract worker"

* 2)  E_rA_N / E^0_rA
gen dec_eta_2  = status_dec_eta_3 / emp_nat_task3d22_90
label var dec_eta_2 "displacement effect, abstract worker"

* 3)  E_rA_r'A / E^0_rA
gen dec_eta_3  = status_dec_eta_4 / emp_nat_task3d22_90
label var dec_eta_3 "relocation effect, abstract worker"

* 4)  (E_r'A_rA + E_N_rA) / E^0_rA
gen dec_eta_4  = (status_dec_eta_5 + status_dec_eta_6) / emp_nat_task3d22_90
label var dec_eta_4 "inflow effect, abstract worker"

* 5)  (E_rR_rA + E_r'R_rA) / E^0_rA
gen dec_eta_5  = (status_dec_eta_7 + status_dec_eta_8) / emp_nat_task3d22_90
label var dec_eta_5 "upgrade effect, abstract worker"

* 5a)  E_rR_rA / E^0_rA
gen dec_eta_5a  = status_dec_eta_7 / emp_nat_task3d22_90
label var dec_eta_5a "upgrade effect (r,r), abstract worker"

* 5b)  E_r'R_rA / E^0_rA
gen dec_eta_5b  = status_dec_eta_8 / emp_nat_task3d22_90
label var dec_eta_5b "upgrade effect (r',r), abstract worker"

* 6)  (E_rA_rR + E_rA_r'R) / E^0_rA
gen dec_eta_6  = (status_dec_eta_9 + status_dec_eta_10) / emp_nat_task3d22_90
label var dec_eta_6 "downgrade effect, abstract worker"

* 6a)  E_rA_rR / E^0_rA
gen dec_eta_6a  = (status_dec_eta_9) / emp_nat_task3d22_90
label var dec_eta_6a "downgrade effect (r,r), abstract worker"

* 6b)  E_rA_r'R / E^0_rA
gen dec_eta_6b  = (status_dec_eta_10) / emp_nat_task3d22_90
label var dec_eta_6b "downgrade effect (r,r'), abstract worker"

* 7)  (E_rA_r'A + E_rA_r'R) / E^0_rA
gen dec_eta_7  = (status_dec_eta_4 + status_dec_eta_10) / emp_nat_task3d22_90
label var dec_eta_7 "relocation effect (L), abstract worker"

* 8)  (E_r'A_rA + E_N_rA + E_r'R_rA) / E^0_rA
gen dec_eta_8  = (status_dec_eta_5 + status_dec_eta_6 + status_dec_eta_8) / emp_nat_task3d22_90
label var dec_eta_8 "inflow effect (L), abstract worker"

* 9)  (E_rA_rA + E_rA_r'A) / E^0_rA
gen dec_eta_9  = (status_dec_eta_11 + status_dec_eta_4) / emp_nat_task3d22_90
label var dec_eta_9 "displacement effect (old), abstract worker"


*** employed workers by age groups

** 3) old age worker in 1990
* 1)   ((E^t_rO - E^0_rO) / E^0_rO) 
gen dec_eao_1  = (emp_nat_age33 - emp_nat_age33_90) / emp_nat_age33_90
label var dec_eao_1 "empl. growth, old age worker"

* 2)  E_rO_N / E^0_rO
gen dec_eao_2  = status_dec_eao_3 / emp_nat_age33_90
label var dec_eao_2 "displacement effect, old age worker"

* 3)  E_rO_r'O / E^0_rO
gen dec_eao_3  = status_dec_eao_4 / emp_nat_age33_90
label var dec_eao_3 "relocation effect, old age worker"

* 4)  (E_r'O_rO + E_N_rO) / E^0_rO
gen dec_eao_4  = (status_dec_eao_5 + status_dec_eao_6) / emp_nat_age33_90
label var dec_eao_4 "inflow effect, old age worker"

* 5)  (E_rM_rO + E_r'M_rO) / E^0_rO	
gen dec_eao_5  = (status_dec_eao_7 + status_dec_eao_8) / emp_nat_age33_90
label var dec_eao_5 "upgrade effect, old age worker"

* 6)  (E_rO_rO + E_rO_r'O) / E^0_rO
gen dec_eao_6  = (status_dec_eao_9 + status_dec_eao_4) / emp_nat_age33_90
label var dec_eao_6 "displacement effect (old), old age worker"


******************************************************************************************
*** save employment estimation sample
******************************************************************************************

sort ao_gem_imp year
compress
label data "estimation sample for agg. employment analysis (1985-1995)"
notes drop _dta
notes: sample between 1985 & 1995, only workers included
notes: Sample restrictions: ${restric_employ_`z'}
notes: vsnr is classified as employed if employed at 30/06 in respective year
save "data/employ_region_s`z'.dta", replace
	}


******************************************************************************************
*** create outcome variables for wage decomposition
******************************************************************************************

use "data/work_region.dta", clear	
drop status_dec_e* status_fteweights	

*** define RHS components of wage decomposition:

*** globals
global start_year 	 = 1985
global end_year 	 = 1995

** count all native full-time workes with a wage (i.e. y_nat != .) by year:
gen 	emp_nat_wage = .
replace emp_nat_wage = 1  if y_nat != .

** estimate native wage conditional on age
reg y_nat  c.age##c.age
predict y_nat_resid1 if e(sample), residuals

reg y_nat  i.age
predict y_nat_resid2 if e(sample), residuals


/*  differences relative to 1990 */

** employment groups:
* 1) 
gen status_dec_wage_emp_share_1 = .
forvalues i = $start_year/$end_year {
	replace status_dec_wage_emp_share_1 =  (y_nat_1990 != . & y_nat != . & ao_gem_imp_1990 == ao_gem_imp & ao_gem_imp_1990 != . & ao_gem_imp != .) if year == `i'
	}

* 2)
gen status_dec_wage_emp_share_2 = .
forvalues i = $start_year/$end_year {
	replace status_dec_wage_emp_share_2 = ((y_nat_1990 != . & y_nat != . & ao_gem_imp_1990 != ao_gem_imp & ao_gem_imp_1990 != . & ao_gem_imp != .) | /// 
				                           (y_nat_1990 == . & y_nat != .      								                    & ao_gem_imp != .)) if year == `i'
	}

* 3)
// * this variable will be recentered to year when outflows occured after collapsing the data
forvalues i = $start_year/$end_year {
	gen status_dec_wage_emp_share_3_`i' = ((y_nat_1990 != . & y_nat_`i' != . & ao_gem_imp_1990 != ao_gem_imp_`i' & ao_gem_imp_1990 != . & ao_gem_imp_`i' != .) | /// 
	                                       (y_nat_1990 != . & y_nat_`i' == .                                     & ao_gem_imp_1990 != .                      )) if year == 1990
	}

** wage groups:
* 1) 
gen status_dec_wage_1 = .
forvalues i = $start_year/$end_year {
	replace status_dec_wage_1 = y_nat  		if y_nat_1990 != . & y_nat != . & ao_gem_imp_1990 == ao_gem_imp & ao_gem_imp_1990 != . & ao_gem_imp != .  & year == `i'
	}

* 2) 
gen status_dec_wage_2 = .
forvalues i = $start_year/$end_year {
	replace status_dec_wage_2 = y_nat_1990  if y_nat_1990 != . & y_nat != . & ao_gem_imp_1990 == ao_gem_imp & ao_gem_imp_1990 != . & ao_gem_imp != .  & year == `i'
	}

* 3)
gen status_dec_wage_3 = .
forvalues i = $start_year/$end_year {
	replace status_dec_wage_3 = y_nat 		if ((y_nat_1990 != . & y_nat != . & ao_gem_imp_1990 != ao_gem_imp & ao_gem_imp_1990 != . & ao_gem_imp != .) | /// 
				                                (y_nat_1990 == . & y_nat != .      								                     & ao_gem_imp != .))  & year == `i'	
	}
	
* 4)
// * this variable will be recentered to year when outflows occured after collapsing the data
forvalues i = $start_year/$end_year {
	gen status_dec_wage_4_`i' = y_nat 		if ((y_nat_1990 != . & y_nat_`i' != . & ao_gem_imp_1990 != ao_gem_imp_`i' & ao_gem_imp_1990 != . & ao_gem_imp_`i' != .) | /// 
	                                            (y_nat_1990 != . & y_nat_`i' == .                                     & ao_gem_imp_1990 != .                      )) & year == 1990
	}

save "data/work_region_wage.dta", replace


******************************************************************************************
*** prepare data for collapse to create wage growth samples
******************************************************************************************

use if (border_imp == 1 | control_imp == 1) using "data/work_region_wage.dta", clear

** drope wide variables (not needed anymore and would not collapse correctly)
drop ao_gem_imp_19??  weight_fte_19??  emp_nat_19??  emp_nat_task2d_19??  task_2dig_2_19??  emp_nat_age3_19??  age_3_19??  y_nat_19??
sort vsnr_ano year


******************************************************************************************
*** collapse data on municipality level: wage sample
******************************************************************************************

keep  if impy != . 		// keep only full-time worker and employed worker (i.e. not restricting on impy_2)

sort vsnr_ano year 

*** save original data
save "data/work_region_wage.dta", replace

** ado file to create samples for pseudo-panle regressions
do "do-files\data_management/cr_pseudo_panel_ado.do"

** create datasets for sub-group decomposition		
* set globals
* keep only analysis regions
global base_restric_wa "(border_imp == 1 | control_imp == 1)"

* 1) All workers
global restric_wage_1 "$base_restric_wa"


*** reload original sample			// full and sub-samples
foreach w of numlist 1 {

use if ${restric_wage_`w'} using "data/work_region_wage.dta", clear	

** municipality level
collapse (mean) ao_kreis_imp weight_matching border_imp border_imp_13 control_imp ost distance ktyp ///
				female imp_edu_2_low imp_edu_2_high imp_edu_3_low imp_edu_3_med imp_edu_3_high ///
				task_2dig_2_man task_2dig_2_abst task_3dig_2_man task_3dig_2_abst ///
				age_3_1 age_3_2 age_3_3 ///
				impy y_* ///
				status_dec_wage_? status_dec_wage_?_???? /// 
		  (sum) czech native emp_* ///
				status_dec_wage_emp_share_*  [iw=weight_fte], by (ao_gem_imp year)	

** set panel dimension
fillin ao_gem_imp year
xtset  ao_gem_imp year
sort   ao_gem_imp year


***** code for decomp
gen status_dec_wage_emp_share_3 = .
forvalues i = $start_year/$end_year {
	bys ao_gem_imp: egen dec_wage_emp_share_3_`i'_max = total(cond(year == 1990, status_dec_wage_emp_share_3_`i', .))
	replace status_dec_wage_emp_share_3 = dec_wage_emp_share_3_`i'_max if year == `i'
	}
	
foreach k in 4  {
gen status_dec_wage_`k' = .
forvalues i = $start_year/$end_year {
	bys ao_gem_imp: egen status_dec_wage_`k'_`i'_max = total(cond(year == 1990, status_dec_wage_`k'_`i', .))
	replace status_dec_wage_`k' = status_dec_wage_`k'_`i'_max if year == `i'
	}
drop status_dec_wage_`k'_*
	}

drop status_dec_wage_emp_share_3_*  dec_wage_emp_share_3_????_max    

replace status_dec_wage_3 = 0  if year == 1990 & status_dec_wage_3 == .


keep  if border_imp == 1 | control_imp == 1


******************************************************************************************
*** correct variables
******************************************************************************************

*** correct distance varaibale
bys ao_gem_imp: egen distmax=max(distance)	
drop distance
rename distmax distance


******************************************************************************************
*** calculate outcome variables for wage growth
******************************************************************************************

sort ao_gem_imp year

*** create employment (native, total, foreign_2, total_nocz) in 1990 to be used as weight in regressions

* employment (native, total, foreign_2, total_nocz) all workers in 1990:
foreach group in nat tot  {
gen emp_`group'_90 = emp_`group'  if year == 1990
bys ao_gem_imp: egen emp_`group'_90_max = max(emp_`group'_90)
drop emp_`group'_90
rename emp_`group'_90_max emp_`group'_90
	}

* employment (native, total, foreign_2) by 2 education groups in 1990: 
foreach group in nat   {
foreach x in 21 22 {
gen emp_`group'_edu`x'_90 = emp_`group'_edu`x'  if year == 1990
bys ao_gem_imp: egen emp_`group'_edu`x'_90_max = max(emp_`group'_edu`x'_90)
drop emp_`group'_edu`x'_90
rename emp_`group'_edu`x'_90_max emp_`group'_edu`x'_90
	}
	}

* employment (native, total, foreign_2) by 3 age groups in 1990: 
foreach group in nat   {
foreach x in 31 32 33 {
gen emp_`group'_age`x'_90 = emp_`group'_age`x'  if year == 1990
bys ao_gem_imp: egen emp_`group'_age`x'_90_max = max(emp_`group'_age`x'_90)
drop emp_`group'_age`x'_90
rename emp_`group'_age`x'_90_max emp_`group'_age`x'_90
	}
	}

* native employment by occupational task group in 1990: 
foreach d of numlist 2 3 {				// use 2- and 3-digit occupational tasks
foreach x in 21 22 {
gen emp_nat_task`d'd`x'_90 = emp_nat_task`d'd`x'  if year == 1990
bys ao_gem_imp: egen emp_nat_task`d'd`x'_90_max = max(emp_nat_task`d'd`x'_90)
drop emp_nat_task`d'd`x'_90
rename emp_nat_task`d'd`x'_90_max emp_nat_task`d'd`x'_90
	}
	}

*** wage growth (native, total, foreign_2, total_nocz) in year X compared to 1990:
foreach group in nat tot  {
gen y_`group'_90 = y_`group'  if year == 1990
bys ao_gem_imp: egen y_`group'_90_max = max(y_`group'_90)
drop y_`group'_90
rename y_`group'_90_max y_`group'_90

gen g_y_`group' = y_`group' - y_`group'_90
replace g_y_`group' = 0  if year == 1990
	}

*** wage growth (native, total, foreign_2) by 2 education groups in year X compared to 1990: 
foreach group in nat   {
foreach x in 21 22 {
gen y_`group'_edu`x'_90 = y_`group'_edu`x'  if year == 1990
bys ao_gem_imp: egen y_`group'_edu`x'_90_max = max(y_`group'_edu`x'_90)
drop y_`group'_edu`x'_90
rename y_`group'_edu`x'_90_max y_`group'_edu`x'_90

gen g_y_`group'_edu`x' = y_`group'_edu`x' - y_`group'_edu`x'_90
replace g_y_`group'_edu`x' = 0  if year == 1990
	}
	}

*** wage growth (native, total, foreign_2) by 3 age groups in year X compared to 1990: 
foreach group in nat   {
foreach x in 31 32 33 {
gen y_`group'_age`x'_90 = y_`group'_age`x'  if year == 1990
bys ao_gem_imp: egen y_`group'_age`x'_90_max = max(y_`group'_age`x'_90)
drop y_`group'_age`x'_90
rename y_`group'_age`x'_90_max y_`group'_age`x'_90

gen g_y_`group'_age`x' = y_`group'_age`x' - y_`group'_age`x'_90
replace g_y_`group'_age`x' = 0  if year == 1990
	}
	}	

** native wage growth by occupational task groups in year X compared to 1990: 
foreach d of numlist 2 3 {				// use 2- and 3-digit occupational tasks
foreach x in 21 22 {
gen y_nat_task`d'd`x'_90 = y_nat_task`d'd`x'  if year == 1990
bys ao_gem_imp: egen y_nat_task`d'd`x'_90_max = max(y_nat_task`d'd`x'_90)
drop y_nat_task`d'd`x'_90
rename y_nat_task`d'd`x'_90_max y_nat_task`d'd`x'_90

gen g_y_nat_task`d'd`x' = y_nat_task`d'd`x' - y_nat_task`d'd`x'_90
replace g_y_nat_task`d'd`x' = 0  if year == 1990
	}
	}


******************************************************************************************
*** calculate outcome variables for decomposition
******************************************************************************************

*** label status variables
label var status_dec_wage_1  "wage stayers in t: ln w^t_r,r"
label var status_dec_wage_2  "wage stayers in 0: ln w^0_r,r"
label var status_dec_wage_3  "wage inflows in t: ln w^t_{r',N},r"
label var status_dec_wage_4  "wage outflows in 0: ln w^0_r,{r',N}"

*** calculate employment shares for equation 1
gen  	  dec_wage_emp_share_1	=	status_dec_wage_emp_share_1	/ emp_nat
label var dec_wage_emp_share_1 		"empl. share stayer: E^t_r0_rt / E_rt"

gen  	  dec_wage_emp_share_2	=	status_dec_wage_emp_share_2	/ emp_nat
label var dec_wage_emp_share_2 		"empl. share inflows: (E^t_r'0_rt + E^t_N0_rt) / E_rt"

gen  	  dec_wage_emp_share_3	=	status_dec_wage_emp_share_3	/ emp_nat_90
label var dec_wage_emp_share_3 		"empl. share outflows: (E^0_r0_r't + E^0_r0_Nt) / E_r0"

*** calculate outcomes for equation 1
gen 	  dec_wage_1  =  status_dec_wage_1 - status_dec_wage_2
label var dec_wage_1     "price effect: ln w^t_r,r - ln w^0_r,r"

gen 	  dec_wage_2  =  (status_dec_wage_3 - status_dec_wage_1) * dec_wage_emp_share_2
label var dec_wage_2     "selec. effect 1: (ln w^t_{r',N},r - ln w^t_r,r) * emp_sh"

gen       dec_wage_3  =  (status_dec_wage_4 - status_dec_wage_2) * dec_wage_emp_share_3
label var dec_wage_3     "selec. effect 2: (ln w^0_r,{r',N} - ln w^0_r,r) * emp_sh"

*** calculate outcomes for equation 1: compact version (selection effect as one component)
gen 	  dec_wage_1_c  =  dec_wage_1
label var dec_wage_1_c     "price effect: dec_wage_1"

gen 	  dec_wage_2_c  =  dec_wage_2 - dec_wage_3
label var dec_wage_2_c     "selec. effect: dec_wage_2 - dec_wage_3"


******************************************************************************************
*** save wage estimation sample
******************************************************************************************

sort ao_gem_imp year
compress
label data "estimation sample for agg. wage growth analysis (1985-1995)"
notes drop _dta
notes: sample between 1985 & 1995, only workers included
notes: Sample restrictions: ${restric_wage_`w'}
notes: vsnr is classified as employed if employed at 30/06 in respective year
save "data/wage_region_s`w'.dta", replace
	}


******************************************************************************************
*** delete data sets
******************************************************************************************

erase "data\work.dta"
erase "data\work_region.dta"
erase "data\work_region_wage.dta"
erase "data/ao_gem_wide.dta"


******************************************************************************************
*** end
******************************************************************************************

exit


*========================================================================================*
Comments:
- unique identifier: vsnr_ano year
